### supplemental material to:
### Michael Kloster, Daniel Langenkämper, Martin Zurowietz, Bánk Beszteri, Tim W. Nattkemper (2020)
### Deep learning-based diatom taxonomy on virtual slides


### please execute scripts 01 - 02 prior to this one!!!


### all the stuff concerning the CNN models

## required libraries
require(keras)
library(caret)
library(e1071)

## output directory for storing results, models etc.
outputDir <<- "results/"

## defining models
ModelsList <- list(                              # list of possible CNN architectures, keep in sync with "defineModel()"!!!
  "DenseNet" = "DenseNet",
  "InceptionResNetv2" = "InceptionResNetv2", 
  "InceptionV3" = "InceptionV3", 
  "MobileNetV2" = "MobileNetV2", 
  "ResNet50" = "ResNet50", 
  "VGG16" = "VGG16",                             # VGG16 original implementation with 2 FC classification layers of 4096 neurons each
  "VGG16_1FC" = "VGG16_1FC",                     # VGG16 adapted implementation with 1 FC classification layer of 256 neurons 
  "VGG16_2FC" = "VGG16_2FC",                     # VGG16 adapted implementation with 2 FC classification layers of 256 neurons each
  "VGG19" = "VGG19",                             # VGG19 original implementation with 2 FC classification layers of 4096 neurons each
  "VGG19_1FC" = "VGG19_1FC",                     # VGG19 adapted implementation with 1 FC classification layer of 256 neurons 
  "VGG19_2FC" = "VGG19_2FC",                     # VGG19 adapted implementation with 2 FC classification layers of 256 neurons each
  "Xception" = "Xception") 

## functions

# function for defining a model based on a list of CNN architectures, for transfer learning
defineModel <- function (
  modelType, # type of the model as defined in "ModelsList"
  classes # classes the model should classify
  )
{
  modelDescription = NULL # for storing all relevant information about the model to be defined

  if(modelType == ModelsList$DenseNet) # DenseNet
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_densenet201(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s) 
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>% # pooling layer
      layer_dense(units = nrow(classes), kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", activation = "softmax", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$InceptionResNetv2) # Inception ResNet V2
  {
    inputShape = c(299,299) # input shape for model/network
    conv_base <- application_inception_resnet_v2(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s) 
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>% # pooling layer
      layer_dense(units = nrow(classes), activation = "softmax", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$InceptionV3) # Inception V3
  {
    inputShape = c(299,299) # input shape for model/network
    conv_base <- application_inception_v3(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>% # pooling layer
      layer_dense(units = nrow(classes), activation = "softmax", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$MobileNetV2) # Mobile Net V2 
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_mobilenet_v2(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>% # pooling layer
      layer_dense(units = nrow(classes), activation = "softmax", use_bias = TRUE, kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$ResNet50) # ResNet50
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_resnet50(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>% # pooling layer
      layer_dense(units = nrow(classes), activation = "softmax", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else 
  if(modelType == ModelsList$VGG16) # VGG16 (original implementation from ImageNet competition)
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_vgg16(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_flatten(name = "flatten") %>% # flatten output of convolutions base
      layer_dense(units = 4096, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "fc1") %>%  # classification layer 1
      layer_dense(units = 4096, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "fc2") %>%  # classification layer 2
      layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } 
  else if(modelType == ModelsList$VGG16_1FC) # VGG16 with small classification head, only 1 small FC layer
    {
      inputShape = c(224,224) # input shape for model/network
      conv_base <- application_vgg16(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
      model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
        conv_base %>% # covolutional based, pre-trained on imageNet data
        layer_flatten(name = "flatten") %>% # flatten output of convolutions base
        layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc1") %>%  # classification layer 1
        layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros") # final classification layer
      freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
      
    } else if(modelType == ModelsList$VGG16_2FC) # VGG16 with small classification head, 2 small FC layers
    {
      inputShape = c(224,224) # input shape for model/network
      conv_base <- application_vgg16(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
      model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
        conv_base %>% # covolutional based, pre-trained on imageNet data
        layer_flatten(name = "flatten") %>% # flatten output of convolutions base
        layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc1") %>%  # classification layer 1
        layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc2") %>%  # classification layer 2
        layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros") # final classification layer
      freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
      
    } else if(modelType == ModelsList$VGG19) # VGG19 (original implementation from ImageNet competition)
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_vgg19(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_flatten(name = "flatten") %>% # flatten output of convolutions base
      layer_dense(units = 4096, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "fc1") %>%  # classification layer 1
      layer_dense(units = 4096, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "fc2") %>%  # classification layer 2
      layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$VGG19_1FC) # VGG19 with one small classification layer
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_vgg19(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_flatten(name = "flatten") %>% # flatten output of convolutions base
      layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc") %>%  # classification layer
      layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$VGG19_2FC) # VGG19 with two small classification layers
  {
    inputShape = c(224,224) # input shape for model/network
    conv_base <- application_vgg19(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_flatten(name = "flatten") %>% # flatten output of convolutions base
      layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc1") %>%  # classification layer 1
      layer_dense(units = 256, activation = "relu", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name ="fc2") %>%  # classification layer 2
      layer_dense(units = nrow(classes), activation = "softmax", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection
    
  } else if(modelType == ModelsList$Xception) # Xception
  {
    # input shape
    inputShape = c(299,299) # input shape for model/network
    conv_base <- application_xception(weights="imagenet", include_top=F, input_shape=c(inputShape, 3)) # initialize convolutional base
    model <- keras_model_sequential() %>% # expand convolutional base with own classification layer(s)
      conv_base %>% # covolutional based, pre-trained on imageNet data
      layer_global_average_pooling_2d(name = "GlobalAveragePooling") %>%
      layer_dense(units = nrow(classes), activation = "softmax", kernel_initializer = initializer_glorot_uniform(seed=123), bias_initializer = "zeros", name = "Predictions") # final classification layer
    freeze_weights(conv_base) # freeze convolutional base, we do not want to re-train the feature detection

  } else # unknown model type 
  {
    stop(paste0("Model type '", modelType, "' is not implemented!")) ## complain that a not supported model was requested
  }

  # use identical callbacks and optimizers for all models, regardless of original implementation
  callbacksList <- list() # no callbacks to be implemented
  optimizer <- optimizer_adam() # Adam optimizer with default settings
  
  modelDescription <- list("model" = model, "classes" = classes, "inputShape" = inputShape, "optimizer" = optimizer, "callbacksList" = callbacksList) # collect data to describe the model
  
  # talk about the model
  print (paste0("Model type '", modelType, "' sucessfully initialized:"))
  print(modelDescription)
  
  # return the model
  return(modelDescription)
}


# train (and evaluate) a CNN model
trainModel <- function (
  projectName, # the name of the project/experiment
  modelType,  # type of the model as defined in "ModelsList"
  dataTraining, # the training data set
  dataValidation, # the validation data set
  dataTest,  # the test/evaluation data set
  nEpochs, # number of epochs to train
  batchSize, # size of training data batches
  classes, # the classes this model should classify
  seed = NULL, # optional seed for initialization of random sampling
  saveModel = TRUE, # flag indicating if the trained model should be saved to disk
  saveData = TRUE, # flag indicating that the training/validation/test-data should be saved to disk
  saveConsoleOutput = TRUE, # flag indicating that the console output should be saved to disk
  clearGPU = TRUE # flag indicating if the GPU memory should be cleared before starting over with this experiment by destroying the current tensorflow graph 
)
{
    # clear GPU memory if requested, to set free any previously allocated tensorflow ressources
  if(clearGPU)
  {
    k_clear_session()
  }
  
  # if requested, initialize random number generator 
  if(!is.null(seed))
  {
    set.seed(seed)
  }
  
  # shufffle data, using R sampling instead of shuffle feature of Keras' "flow_images_from_dataframe" to enable reproducable results
  dataTraining <- dataTraining[sample(nrow(dataTraining)),]
  dataValidation <- dataValidation[sample(nrow(dataValidation)),]
  dataTest <- dataTest[sample(nrow(dataTest)),]
  
  # test if image files exists, using the first entry of the training data set as example
  if(!file.exists(as.character(dataTraining$Imagefile[1])))
  {
    stop(paste0("Could not find image file '",dataTraining$Imagefile[1],"'.\nPlease download the respective data from the PANGAEA repository and copy it into the subfolder ./images/"))
  }
  
  # create output directory for storing results if necessary
  if(!dir.exists(outputDir))
  {
    dir.create(outputDir)
  }
  
  # save input data for documentation purposes if requested
  if(saveData)
  {
    write.csv2(dataTraining, file=paste0(outputDir, projectName,".data.training.csv"))
    write.csv2(dataValidation, file=paste0(outputDir, projectName,".data.validation.csv"))
    write.csv2(dataTest, file=paste0(outputDir, projectName,".data.test.csv"))
  }

  # save console output if requested
  if(saveConsoleOutput)
  {
    sink(paste0(outputDir, projectName,".txt"), split = TRUE)
  }
  
  # talk about project/experiment
  cat(paste0("\n\nProject '", projectName, "':\n"))
  
  # initialize model network architecture
  modelDefinition <- defineModel(modelType = modelType, classes = classes)
  
  if(is.null(modelDefinition)) # stop if model could not be initialized, clean up
  {
    # stop saving console output
    if(saveConsoleOutput)
    {
      sink()
    }
    stop () # stop execution if model could not be defined
  }
  
  model <- modelDefinition$model # get the CNN to be trained
  inputShape <- modelDefinition$inputShape # get the CNN's input shape
  
  
  # training data
  # image data generator for training data, with massive augmentation
  train_datagen = image_data_generator( # training images
    rescale = 1/255,
    rotation_range = 180,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = TRUE,
    vertical_flip =  TRUE,
    fill_mode = "nearest"
  )
  
  # data generator for training data
  train_generator <- flow_images_from_dataframe(
    dataTraining, 
    directory = "", # path is included in dataTraining$ImageFile
    x_col="Imagefile", # the image file as training input
    y_col="ClassName", # the class of the image file
    generator = train_datagen,
    target_size = inputShape,
    color_mode = "rgb",
    class_mode = "categorical",
    shuffle = FALSE, # data already is shuffled with a fixed random seed, we do not wont to shuffle it again to allow for repeatability of experiments
    batch_size = batchSize
  )
  
  # validation data
  # image data generator validation, no augmentation
  validation_datagen <- image_data_generator(rescale = 1/255)
  
  # data generator validation
  validation_generator <- flow_images_from_dataframe(
    dataValidation, 
    directory = "", # path is included in dataTraining$ImageFile
    x_col="Imagefile", # the image file as training input
    y_col="ClassName", # the class of the image file
    generator = validation_datagen,
    target_size = inputShape,
    color_mode = "rgb",
    class_mode = "categorical", # data already is shuffled with a fixed random seed, we do not wont to shuffle it again to allow for repeatability of experiments
    shuffle = FALSE,
    batch_size = batchSize
  )
  
  
  # talk about the data
  cat("Data:\n")
  cat(paste0("Classes: ", nrow(classes),"\n"))
  cat(paste0("\nTraining: ", nrow(dataTraining)," samples:\n"))
  print(dataDLStatistics(dataTraining, sumUp = TRUE)) # class counts
  cat(paste0("\nValidation: ", nrow(dataValidation)," samples:\n"))
  print(dataDLStatistics(dataValidation, sumUp = TRUE)) # class counts
  cat(paste0("\nTest: ", nrow(dataTest)," samples:\n"))
  print(dataDLStatistics(dataTest, sumUp = TRUE)) # class counts

  # compile the model
  cat("\nCompiling Model:\n")
  model %>% compile(
    loss = "categorical_crossentropy",
    optimizer = modelDefinition$optimizer,
    metrics = c("accuracy")
  )
  
  # train the model
  cat("\nStarting Training:\n")
  cat(paste0("Epochs: ", nEpochs, "\nBatch size: ", batchSize, "\n"))
  history <- model %>% fit_generator(
    train_generator,
    steps_per_epoch = ceiling(nrow(dataTraining)/train_generator$batch_size),
    epochs = nEpochs,
    validation_data = validation_generator,
    validation_steps = ceiling(nrow(dataValidation)/validation_generator$batch_size),
    callbacks = modelDefinition$callbacksList
  )
  
  # store model and training info
  if (saveModel)
  {
    saveModelToDisk(projectName, model = model, history = history)
  }

  # test model and gather evaluation statistics
  modelPredictions <- testModel(projectName, model = model, inputShape = inputShape, dataTest = dataTest) # get predictions
  modelEvaluation <- evaluatePredictions (modelPredictions$predictions, classIndices = modelPredictions$classIndices, testdataClasses = modelPredictions$actualClassIndices) # evaluate predictions
  
  # take care of console output
  if(saveConsoleOutput)
  {
    cat("\n\nModel evaluation:\n")
    print(modelEvaluation)
    sink()
    save(modelPredictions, file = paste0(outputDir, projectName,".Predictions.RData"))
    save(modelEvaluation, file = paste0(outputDir, projectName,".Evaluation.RData"))
  }
  
  # return evaluation results
  return (list("ProjectName" = projectName, "Predictions" = modelPredictions, "Evaluation" = modelEvaluation))
}


# save a CNN model to disk
saveModelToDisk <- function (
  projectName, # the name of this project/experiment
  model, # the model to save
  history = NULL # if supplied, the history plots will be saved as PDF
)
{
  save_model_hdf5(model, paste0(outputDir, projectName,".h5")) # save the model to disk
  
  # save the history plots
  if(!is.null(history))
  {
    pdf(file=paste0(outputDir, projectName,".pdf"))
    print(plot(history))
    dev.off()
  }
}


# apply a CNN model to test data
testModel <- function(
  projectName, # the name of this project/experiment
  model, # the CNN model
  inputShape, # the input shape to convert the test data to
  dataTest # the test data
)
{
  # talk about testing
  cat(paste0("\n\nEvaluating trained model for project '", projectName, "':\n"))
  
  # image data generator for test data, no augmentation
  test_datagen <- image_data_generator(rescale = 1/255)
  
  # data generator for test data test
  test_generator <- flow_images_from_dataframe(
    dataTest, 
    directory = "", # path is included in dataTraining$ImageFile
    x_col="Imagefile", # the image file as training input
    y_col="ClassName", # the class of the image file
    generator = test_datagen,
    target_size = inputShape,
    color_mode = "rgb",
    class_mode = "categorical",
    batch_size = 1, # one prediction at a time
    shuffle = FALSE # don't shuffle, we want to know where predictions come from
  )
  
  # make predictions
  predictions <-  model %>% predict_generator(test_generator, steps = nrow(dataTest)/test_generator$batch_size)
  
  # return results
  return (list("predictions" = predictions, "classIndices" = test_generator$class_indices, "actualClassIndices" = test_generator$classes)) 
}


# evalute predictions by confusion matrix and statistic indices
evaluatePredictions <- function(predictions, classIndices, testdataClasses)
{
  # get the class id with the highest probability
  getClassFromPrediction <- function(x)
  {
    w <- which(x==max(x))
    return (w-1) # class id = index - 1 (keras starts counting at 0)
  }
  
  predictedClasses = apply(predictions,1,getClassFromPrediction) # get the predicted classes (i.e. per prediction the class with the highest prediction value)
  
  # factorize classes
  classesFactors = c(0:(ncol(predictions)-1)) 
  predictedClassesFactors = factor(predictedClasses, classesFactors)
  testdataClassesFactors = factor(testdataClasses, classesFactors)
  
  # calculate confusion matrix, requires package "caret"
  confusionMatrix = confusionMatrix(predictedClassesFactors, testdataClassesFactors, mode = "prec_recall" )
  confusionTable = confusionMatrix$table

  # evaluate prediction performance
  statistics <- data.frame("class" = 0:(nrow(confusionTable)-1), "className" = names(classIndices), "TP" = NA, "FP" = NA, "FN" = NA, "precision" = NA, "recall" = NA) # for storing true positives (TP), false positives (FP), false negatives (FN), etc. per class
  for (i in 1:nrow(confusionTable)) # count true positives (TP), false positives (FP), false negatives (FN) per class 
  {
    statistics[i,]$TP = confusionTable[i,i]
    statistics[i,]$FP = sum(confusionTable[i,-i])
    statistics[i,]$FN = sum(confusionTable[-i,i])
  }
  
  statistics$precision = statistics$TP / (statistics$TP + statistics$FP) # calculate precision per class
  statistics$recall = statistics$TP / (statistics$TP + statistics$FN) # calculate recall per class

  # warning for classes that have not been predicted  
  notPredicted = statistics[statistics$TP==0,]
  if (nrow(notPredicted)>0)
  {
    warning(paste("No prediction of class(es):", paste(notPredicted$className, collapse = ", ")), immediate. = TRUE)
  }
  
  # workaround for classes that have not been predicted, set precision = 0 to enable further calculations
  if(nrow(statistics[is.nan(statistics$precision),])>0)
  {
    statistics[is.nan(statistics$precision),]$precision=0
  }
  
  # calculate F1 per class
  statistics$F1 =  2 * statistics$precision * statistics$recall / (statistics$precision + statistics$recall)
  if(nrow(statistics[is.nan(statistics$F1),])>0) # workaround if F1 could not be calculated, set it to 0
  {
    statistics[is.nan(statistics$F1),]$F1=0
  }
  
  # calculate macro averaged prediction performance
  macro = list()
  macro$precision = sum(statistics$precision)/nrow(statistics) # macro averaged precision
  macro$recall = sum(statistics$recall)/nrow(statistics) # macro averaged recall
  macro$F1 = sum(statistics$F1)/nrow(statistics) # macro averaged F1
  macro$F1.Sokolova_Lapalme = 2 * macro$precision * macro$recall / (macro$precision + macro$recall) # macro averaged F1 according to Sokolova & Lapalme
  
  # calculate micro averaged prediction performance
  micro=list()
  micro$precision = sum(statistics$TP) / (sum(statistics$TP) + sum (statistics$FP)) # micro averaged precision
  micro$recall = sum(statistics$TP) / (sum(statistics$TP) + sum (statistics$FN)) # micro averaged recall
  micro$F1 = 2 * micro$precision * micro$recall / (micro$precision + micro$recall) # micro averaged F1

  # return evaluation data
  return (list("confusionMatrix" = confusionMatrix, "statistics" = statistics, "macro" = macro, "micro" = micro))
}


# train CNN model with k-fold cross validation
trainModelkfold <-  function (
  projectName, # the name of this project/experiment
  modelType, # type of the model as defined in "ModelsList"
  data, # the complete data used for training/validation/testing, will be split accordingly in this function
  k, # number of folds for k-fold cross validation
  portionTrain, # portion of each fold to be used for training, the rest will be used for validation
  nEpochs, # number of epochs to train
  batchSize, # size of training data batches
  classes, # the classes this model should classify
  seed = NULL # seed for initializing random sampling
)
{
  # talk about the project/experiment
  cat(paste0("\n\nProject '", projectName, "':\n"))
  cat(paste0("Training with ",k,"-fold cross validation\n"))
  
  # prepare statistics
  foldsPredictions <- list() # storage for prediction data of each fold's model
  foldsEvaluations <- list() # storage for evaluation data of each fold's model
  sum.macroF1 <- 0 # sum of macro F1 of all folds
  sum.microF1 <- 0 # sum of micro F1 of all folds
  
  # augment data with folds
  data.folds <- splitDLDataKFold(data, k, seed = seed)
  
  # perform cross-validation
  for(i in 1:k) # iterate over folds
  {
    projectNameFold <- paste0(projectName, ".fold_",i)
    
    data.sets <- splitDLData(data.folds[data.folds$k!=i,], portionTrain = portionTrain, seed = seed) # join k-1 folds to generate training and validation data
    data.sets$test <- data.folds[data.folds$k==i,] # take 1 fold for test data
    
    # train model on k-1 folds, test on 1 fold    
    modelFoldEvaluation <- trainModel(projectNameFold, modelType = modelType, dataTraining =  data.sets$train, dataValidation =  data.sets$validate, dataTest =  data.sets$test, nEpochs =  nEpochs, batchSize =  batchSize, classes =  classes, seed = seed, saveModel = TRUE)
    
    # predict and evaluate model
    pred <- modelFoldEvaluation$Predictions
    eval <- modelFoldEvaluation$Evaluation
    eval$ProjectName <- projectNameFold
    
    foldsPredictions[[i]] <- pred # store evaluation data of this fold's model
    foldsEvaluations[[i]] <- eval # store evaluation data of this fold's model
    
    # sum up F1
    sum.macroF1 <- sum.macroF1 + eval$macro$F1
    sum.microF1 <- sum.microF1 + eval$micro$F1
    
  }
  
  # calculate average F1 over all folds
  avg.macroF1 <- sum.macroF1/k
  avg.microF1 <- sum.microF1/k
  
  # return results
  return (list("ProjectName" = projectName, "FromMultipleResults" = TRUE, "AverageMacroF1" = avg.macroF1, "AverageMicroF1" = avg.microF1, "experimentFoldsPredictions" = foldsPredictions, "experimentFoldsEvaluations" = foldsEvaluations))
}


# train (and evaluate) a CNN model n times (as replicates)
trainModelnTimes <- function (
  projectName, # the name of the project/experiment
  modelType,  # type of the model as defined in "ModelsList"
  nRuns, # number of replicates of the model to execute
  dataTraining, # the training data
  dataValidation, # the validation data
  dataTest,  # the test/evaluation data
  nEpochs, # number of epochs to train
  batchSize, # size of training data batches
  classes, # the classes this model should classify
  seed = NULL, # optional seed for initialization of random sampling
  saveModel = TRUE, # flag indicating if the trained model should be saved to disk
  saveData = TRUE, # flag indicating that the training/validation/test-data should be saved to disk
  saveConsoleOutput = TRUE, # flag indicating that the console output should be saved to disk
  clearGPU = TRUE # flag indicating if the GPU memory should be cleared before starting over with this experiment by destroying the current tensorflow graph 
)
{
  # talk about the project
  cat(paste0("\n\nProject '", projectName, "':\n"))
  cat(paste0("Running ",nRuns," times for averaging\n"))
  
  # prepare statistics
  runsPredictions <- list() # storage for prediction data of each run's model
  runsEvaluations <- list() # storage for evaluation data of each run's model
  sum.macroF1 <- 0 # sum of macro F1 of all runs
  sum.microF1 <- 0 # sum of micro F1 of all runs
  
  # perform runs
  for(i in 1:nRuns) # iterate over runs
  {
    projectNameRun<- paste0(projectName, ".run_",i)
    
    # train model  
    modelRunEvaluation <- trainModel(projectNameRun, modelType = modelType, dataTraining =  dataTraining, dataValidation =  dataValidation, dataTest =  dataTest, nEpochs =  nEpochs, batchSize =  batchSize, classes =  classes, seed = seed, saveModel = TRUE)
    
    # predict and evaluate model
    pred <- modelRunEvaluation$Predictions
    eval <- modelRunEvaluation$Evaluation
    eval$ProjectName <- projectNameRun
    
    runsPredictions[[i]] <- pred # store evaluation data of this run's model
    runsEvaluations[[i]] <- eval # store evaluation data of this run's model
    
    # sum up F1
    sum.macroF1 <- sum.macroF1 + eval$macro$F1
    sum.microF1 <- sum.microF1 + eval$micro$F1
    
  }
  
  # calculate average F1  
  avg.macroF1 <- sum.macroF1/nRuns
  avg.microF1 <- sum.microF1/nRuns
  
  # return results
  return (list("ProjectName" = projectName, "FromMultipleResults" = TRUE, "AverageMacroF1" = avg.macroF1, "AverageMicroF1" = avg.microF1, "experimentFoldsPredictions" = runsPredictions, "experimentFoldsEvaluations" = runsEvaluations))
  
}

# function for logging evaluation results for a bunch of experiments
logResults <- function ( 
  model = "", # type of the model as defined in ModelsList
  evaluationData = NULL, # evaluation data as provided by evaluatePredictions()
  init = FALSE # flag indicating if the data.frames "Results.AllExperiments" and "Results.IndividualRuns" should be initialized
)
{
  if(init) # initialize logging
  {
    # create empty data.frame for collecting results
    Results.AllExperiments <<- data.frame(Project.Name = character(), Model = character(), Macro.F1 = numeric(), Micro.F1 = numeric(), ValuesAveraged = logical())
    Results.IndividualRuns <<-  data.frame(Project.Name = character(), Model = character(), Macro.F1 = numeric(), Micro.F1 = numeric())
  } else # log data
  {
    # add results to log
    if("FromMultipleResults" %in% names(evaluationData)) # result from a crossvalidation experiment
    {
      # add averaged results (from k-fold crossvalidation or replicate runs) to global variable "Results.AllExperiments"
      Results.AllExperiments <<- rbind(Results.AllExperiments, data.frame(Project.Name = evaluationData$ProjectName, Model = model, Macro.F1 = evaluationData$AverageMacroF1, Micro.F1 = evaluationData$AverageMicroF1, ValuesAveraged = TRUE))
      for (i in 1:length(evaluationData$experimentFoldsEvaluations))
      {
        # add individual results (from each k-fold crossvalidation or replicate run) to global variable "Results.IndividualRuns"
        Results.IndividualRuns <<- rbind(Results.IndividualRuns, 
          data.frame(
          Project.Name = evaluationData$experimentFoldsEvaluations[[i]]$ProjectName, Model = model, 
          Macro.F1 = evaluationData$experimentFoldsEvaluations[[i]]$macro$F1, Micro.F1 = evaluationData$experimentFoldsEvaluations[[i]]$micro$F1
          )
        )
      }
      
    } else 
    { # result from a single run experiment
      Results.AllExperiments <<- rbind(Results.AllExperiments, data.frame(Project.Name = evaluationData$ProjectName, Model = model, 
        Macro.F1 = evaluationData$Evaluation$macro$F1, Micro.F1 = evaluationData$Evaluation$micro$F1, ValuesAveraged = FALSE))
      Results.IndividualRuns <<- rbind(Results.IndividualRuns, data.frame(Project.Name = evaluationData$ProjectName, Model = model, 
        Macro.F1 = evaluationData$Evaluation$macro$F1, Micro.F1 = evaluationData$Evaluation$micro$F1))
    }
    
    # save all results retrieved so far to disk
    ts <- format(Sys.time(),"%Y%m%d%H%M%S")
    write.csv2(Results.AllExperiments, file = paste0(outputDir,"Results.AllExperiments.",ts,".csv"))
    write.csv2(Results.IndividualRuns, file = paste0(outputDir,"Results.IndividualRuns.",ts,".csv"))
  }
}


